library(Rcpp)STATS 506 HW 5-kjaewon
Github Repository: https://github.com/kjaewon-umich/STATS506
Problem 1.
Setup
a.
#' Rational Number Class
#'
#' An S4 class to represent a rational number with a numerator and denominator.
#'
#' @slot numerator An integer representing the numerator of the rational number.
#' @slot denominator An integer representing the denominator of the rational number.
#' @prototype A rational number with numerator `1L` and denominator `1L`.
#' @validity Ensures that the denominator is not zero.
#'
#' @examples
#' r <- rational(3, 4)
#' show(r)
setClass(
"rational",
slots = c(numerator = "integer", denominator = "integer"),
prototype = list(numerator = 1L, denominator = 1L),
validity = function(object) {
if (object@denominator == 0L) {
return("Denominator cannot be zero.")
}
TRUE
}
)
#' Rational Number Constructor
#'
#' Creates an instance of the `rational` class.
#'
#' @param numerator An integer for the numerator. Defaults to `1L`.
#' @param denominator An integer for the denominator. Defaults to `1L`.
#'
#' @return A `rational` object.
#' @examples
#' r <- rational(3, 4)
#' show(r)
rational <- function(numerator = 1L, denominator = 1L) {
new("rational", numerator = as.integer(numerator),
denominator = as.integer(denominator))
}
#' Compute GCD and LCM
#'
#' Functions to compute the greatest common divisor (GCD) and least common
#' multiple (LCM).
#'
#' @param a An integer.
#' @param b An integer.
#'
#' @return The GCD or LCM of `a` and `b`.
#' @examples
#' gcd(12, 15) # Output: 3
#' lcm(12, 15) # Output: 60
cppFunction('
int gcd(int a, int b) {
while (b != 0) {
int temp = b;
b = a % b;
a = temp;
}
return std::abs(a);
}
int lcm(int a, int b) {
return std::abs(a * b) / gcd(a, b); // LCM formula using GCD
}
')
#' Simplify a Rational Number
#'
#' A method to simplify a `rational` object by reducing the numerator and
#' denominator by their GCD.
#'
#' @param x A `rational` object.
#'
#' @return A simplified `rational` object.
#' @examples
#' r <- rational(6, 8)
#' simplify(r) # Output: 3/4
setGeneric("simplify", function(x) standardGeneric("simplify"))[1] "simplify"
# Define the simplify method for rational objects
setMethod("simplify", "rational", function(x) {
divisor <- gcd(x@numerator, x@denominator)
x@numerator <- as.integer(x@numerator / divisor)
x@denominator <- as.integer(x@denominator / divisor)
# Ensure the denominator is positive
if (x@denominator < 0) {
x@numerator <- as.integer(-x@numerator)
x@denominator <- as.integer(-x@denominator)
}
x
})
#' Display a Rational Number
#'
#' A method to print a `rational` object in the format `numerator/denominator`.
#'
#' @param object A `rational` object.
#'
#' @examples
#' r <- rational(5, 10)
#' show(r) # Output: 1/2
setMethod("show", "rational", function(object) {
object <- simplify(object)
cat(object@numerator, "/", object@denominator, "\n")
})
#' Compute Quotient of a Rational Number
#'
#' A method to compute the floating-point value of a `rational` object.
#'
#' @param x A `rational` object.
#' @param digits An optional integer specifying the number of decimal places to
#' round to.
#'
#' @return A numeric value representing the quotient.
#' @examples
#' r <- rational(1, 3)
#' quotient(r) # Output: 0.3333...
#' quotient(r, digits = 2) # Output: 0.33
setGeneric("quotient", function(x, digits = NULL) standardGeneric("quotient"))[1] "quotient"
# Update the quotient method for rational objects with rounding
setMethod("quotient", "rational", function(x, digits = NULL) {
result <- x@numerator / x@denominator
if (!is.null(digits)) {
result <- round(result, digits)
}
result
})
#' Arithmetic Operations for Rational Numbers
#'
#' Methods for addition, subtraction, multiplication, and division of `rational`
#' objects.
#'
#' @param e1 A `rational` object.
#' @param e2 A `rational` object.
#'
#' @return A new `rational` object representing the result.
#' @examples
#' r1 <- rational(1, 3)
#' r2 <- rational(1, 6)
#' r1 + r2 # Output: 1/2
#' r1 - r2 # Output: 1/6
#' r1 * r2 # Output: 1/18
#' r1 / r2 # Output: 2
setMethod("+", c("rational", "rational"), function(e1, e2) {
common_denominator <- e1@denominator * e2@denominator
new_numerator <- e1@numerator * e2@denominator + e2@numerator * e1@denominator
simplify(rational(new_numerator, common_denominator))
})
# Define subtraction for two rational objects
setMethod("-", c("rational", "rational"), function(e1, e2) {
common_denominator <- e1@denominator * e2@denominator
new_numerator <- e1@numerator * e2@denominator - e2@numerator * e1@denominator
simplify(rational(new_numerator, common_denominator))
})
setMethod("*", c("rational", "rational"), function(e1, e2) {
new_numerator <- e1@numerator * e2@numerator
new_denominator <- e1@denominator * e2@denominator
simplify(rational(new_numerator, new_denominator))
})
# Define division for two rational objects
setMethod("/", c("rational", "rational"), function(e1, e2) {
new_numerator <- e1@numerator * e2@denominator
new_denominator <- e1@denominator * e2@numerator
simplify(rational(new_numerator, new_denominator))
})b.
# Testing the class with given test cases
r1 <- rational(24, 6)
r2 <- rational(7, 230)
r3 <- rational(0, 4)r14 / 1
r30 / 1
r1 + r2927 / 230
r1 - r2913 / 230
r1 * r214 / 115
r1 / r2920 / 7
r1 + r34 / 1
r1 * r30 / 1
r2 / r3Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'simplify': invalid class "rational" object: Denominator cannot be zero.
quotient(r1)[1] 4
quotient(r2)[1] 0.03043478
quotient(r2, digits = 3)[1] 0.03
quotient(r2, digits = 3.14)[1] 0.03
quotient(r2, digits = "avocado")Error in round(result, digits): non-numeric argument to mathematical function
q2 <- quotient(r2, digits = 3)
q2[1] 0.03
quotient(r3)[1] 0
simplify(r1)4 / 1
simplify(r2)7 / 230
simplify(r3)0 / 1
c.
#' Rational Number Class
#'
#' An S4 class to represent a rational number with enhanced validation for
#' the numerator and denominator.
#'
#' @slot numerator An integer representing the numerator of the rational number.
#' @slot denominator An integer representing the denominator of the rational number.
#' @prototype A rational number with numerator `1L` and denominator `1L`.
#' @validity Ensures that:
#' - The denominator is not zero.
#' - Neither the numerator nor the denominator is `NA`.
#' - Both numerator and denominator are integers.
#'
#' @examples
#' # Create a valid rational number
#' r <- rational(3, 4)
#' r
#'
#' # Attempt to create invalid rational numbers
#' try(rational(3, 0)) # Denominator cannot be zero
#' try(rational(NA, 5)) # Numerator cannot be NA
#' try(rational(5.5, 2)) # Numerator must be an integer
#'
setClass(
"rational",
slots = c(numerator = "integer", denominator = "integer"),
prototype = list(numerator = 1L, denominator = 1L),
validity = function(object) {
if (object@denominator == 0L) {
return("Denominator cannot be zero.")
}
if (is.na(object@numerator) || is.na(object@denominator)) {
return("Numerator or denominator cannot be NA.")
}
if (!is.integer(object@numerator) || !is.integer(object@denominator)) {
return("Numerator and denominator must be integers.")
}
TRUE
}
)
#' Rational Number Class
#'
#' An S4 class to represent a rational number with enhanced validation for
#' the numerator and denominator.
#'
#' @slot numerator An integer representing the numerator of the rational number.
#' @slot denominator An integer representing the denominator of the rational number.
#' @prototype A rational number with numerator `1L` and denominator `1L`.
#' @validity Ensures that:
#' - The denominator is not zero.
#' - Neither the numerator nor the denominator is `NA`.
#' - Both numerator and denominator are integers.
#'
#' @examples
#' # Create a valid rational number
#' r <- rational(3, 4)
#' r
#'
#' # Attempt to create invalid rational numbers
#' try(rational(3, 0)) # Denominator cannot be zero
#' try(rational(NA, 5)) # Numerator cannot be NA
#' try(rational(5.5, 2)) # Numerator must be an integer
#'
rational <- function(numerator = 1L, denominator = 1L) {
if (!is.integer(numerator)) numerator <- as.integer(numerator)
if (!is.integer(denominator)) denominator <- as.integer(denominator)
new("rational", numerator = numerator, denominator = denominator)
}
# Test Cases
try(rational(1, 0)) # Invalid: denominator is zeroError in validObject(.Object) :
invalid class "rational" object: Denominator cannot be zero.
try(rational(1, NA)) # Invalid: denominator is NAError in if (object@denominator == 0L) { :
missing value where TRUE/FALSE needed
try(rational(NA, 1)) # Invalid: numerator is NAError in validObject(.Object) :
invalid class "rational" object: Numerator or denominator cannot be NA.
try(rational("1", 2)) # Invalid: numerator is a string1 / 2
try(rational(1.5, 2)) # Invalid: numerator is a float1 / 2
# Valid Input
r4 <- rational(3, 4)
r43 / 4
Problem 2.
Setup
library(tidyverse)
library(plotly)
art <- read.csv("df_for_ml_improved_new_market.csv", header = TRUE)a.
From Problem Set 4, we observed that the distribution of art sales genres appears to change across years. To better visualize this trend, we preprocess the data using tidyverse to calculate the proportion of each genre for every year.
# Data Preparation
art$Genre___Others[art$Genre___Painting == 1] <- 0
art$genre <- "Photography" # Default genre
art$genre[art$Genre___Print == 1] <- "Print"
art$genre[art$Genre___Sculpture == 1] <- "Sculpture"
art$genre[art$Genre___Painting == 1] <- "Painting"
art$genre[art$Genre___Others == 1] <- "Other"
# Create proportions by year and genre
yeargenre <- with(art, table(year, genre))
ygperc <- yeargenre / rowSums(yeargenre)
ygpercm <- as.data.frame(as.table(ygperc))
colnames(ygpercm) <- c("year", "genre", "proportion")
# Ensure genres are factors with the correct order
ygpercm$genre <- factor(ygpercm$genre,
levels = c("Painting", "Sculpture","Photography",
"Print", "Other"))To address this question, we utilize the interactive features of the plotly package. The interactive plot highlights the genre name and its proportion of the total sales volume for each year, allowing users to explore the changes in the distribution dynamically.
# Create Plotly Horizontal Stacked Bar Plot
fig <- ygpercm %>%
plot_ly(
x = ~proportion, # Proportion on the x-axis
y = ~year, # Year on the y-axis
color = ~genre, # Color by genre
type = "bar", # Bar plot type
orientation = "h", # Horizontal orientation
text = ~paste(
"Genre:", genre, "<br>",
"Proportion:", round(proportion, 2)
), # Custom tooltip
hoverinfo = "text" # Display custom tooltip
) %>%
layout(
barmode = "stack", # Stack the bars
title = "Proportion of Genre of Art Sales",
xaxis = list(
title = "Proportion",
range = c(0, 1) # Ensure proportions are bounded between 0 and 1
),
yaxis = list(title = "Year"),
legend = list(title = list(text = "Genre"))
)
# Add arrow and text for "Other"
fig <- fig %>%
add_annotations(
x = 1.02, y = 15, # Position of "Other" annotation
text = "Other",
showarrow = TRUE,
ax = -20, ay = 0, # Adjust arrow length and angle
arrowhead = 2
)
# Display the plot
figFrom the visualization, we observe the following trends:
- Painting shows the most significant decline over time.
- Sculpture, Photography, and Other genres remain relatively stable.
- Print exhibits a noticeable increase in its proportion starting around 2000.
b.
From Problem Set 4, we observed a change in sales prices over time, with genre significantly influencing these trends. To better visualize this, we preprocess the data using tidyverse, calculating the median and 97.5th percentile prices by year and genre.
# Data Preparation
artmedian <- aggregate(art$price_usd, by = list(art$year, art$genre),
FUN = median, na.rm = TRUE)
names(artmedian) <- c("year", "genre", "price_usd")
art975 <- aggregate(art$price_usd, by = list(art$year, art$genre),
FUN = quantile, probs = 0.975, na.rm = TRUE)
names(art975) <- c("year", "genre", "price_usd")
# Combine median and 97.5th percentile
artcombine <- bind_rows(
artmedian %>% mutate(measure = "Median"),
art975 %>% mutate(measure = "97.5%")
)To leverage plotly package’s interactive features effectively, we highlight the price, genre, and year to provide detailed insights for each data point.
# Create Interactive Plotly Plot
fig2 <- artcombine %>%
plot_ly(
x = ~year,
y = ~price_usd,
color = ~genre,
linetype = ~measure,
type = "scatter",
mode = "lines",
text = ~paste(
"Year:", year, "<br>",
"Genre:", genre, "<br>",
"Measure:", measure, "<br>",
"Price (USD):", scales::comma(price_usd)
),
hoverinfo = "text"
) %>%
layout(
title = list(
text = "Changes in Sales Price by Genre Over Time",
font = list(size = 16, family = "Arial")
),
xaxis = list(
title = "Year",
range = c(1997, 2012),
tickvals = seq(1997, 2012, 2),
showgrid = TRUE,
zeroline = FALSE
),
yaxis = list(
title = "Price in Thousands USD",
tickvals = seq(0, 350000, 50000),
ticktext = paste0(seq(0, 350, 50), "k"),
showgrid = TRUE,
zeroline = TRUE
),
legend = list(
title = list(text = "Genre and Measure"),
orientation = "v", # Vertical orientation to avoid overlap
x = 1.1, y = 1, # Position the legend outside the plot area
xanchor = "left",
yanchor = "top"
),
margin = list(
l = 80, r = 120, b = 80, t = 80
) # Add margins for cleaner spacing
)
# Display the revised Plotly plot
fig2From the interactive visualization:
- Sculpture shows a consistent upward trend throughout the period.
- Painting exhibited a stable increase until 2007, followed by a sharp spike and a subsequent drastic decline.
- Photography and Print exhibit significant fluctuations over time.
- Other genres maintain a relatively stable trend with no notable changes from 2007.
Problem 3.
Setup
library(data.table)
library(nycflights13)
flights <- data.table(flights)
planes <- data.table(planes)a.
# Delay for Departure
departure <- merge(flights[, faa := origin],
airports,
by = "faa",
all.x = TRUE)
departure[, .(N = .N,
avg_delay = mean(dep_delay, na.rm = TRUE),
med_delay = median(dep_delay, na.rm = TRUE)),
by = name] |>
_[N >= 10, !"N"] |>
_[order(avg_delay, decreasing = TRUE)] name avg_delay med_delay
<char> <num> <num>
1: Newark Liberty Intl 15.10795 -1
2: John F Kennedy Intl 12.11216 -1
3: La Guardia 10.34688 -3
# Delay for Arrival
arrival <- merge(flights[, faa := dest],
airports,
by = "faa",
all.x = TRUE)
arrival[, .(name = ifelse(is.na(first(name)), first(faa), first(name)),
N = .N,
avg_delay = mean(arr_delay, na.rm = TRUE),
med_delay = median(arr_delay, na.rm = TRUE)),
by = faa] |>
_[N >= 10, !c("faa", "N")] |>
_[order(avg_delay, decreasing = TRUE)] |>
print(x = _, nrows = 10000) name avg_delay med_delay
<char> <num> <num>
1: Columbia Metropolitan 41.76415094 28.0
2: Tulsa Intl 33.65986395 14.0
3: Will Rogers World 30.61904762 16.0
4: Jackson Hole Airport 28.09523810 15.0
5: Mc Ghee Tyson 24.06920415 2.0
6: Dane Co Rgnl Truax Fld 20.19604317 1.0
7: Richmond Intl 20.11125320 1.0
8: Akron Canton Regional Airport 19.69833729 3.0
9: Des Moines Intl 19.00573614 0.0
10: Gerald R Ford Intl 18.18956044 1.0
11: Birmingham Intl 16.87732342 -2.0
12: Theodore Francis Green State 16.23463687 1.0
13: Greenville-Spartanburg International 15.93544304 -0.5
14: Cincinnati Northern Kentucky Intl 15.36456376 -3.0
15: Savannah Hilton Head Intl 15.12950601 -1.0
16: Manchester Regional Airport 14.78755365 -3.0
17: Eppley Afld 14.69889841 -2.0
18: Yeager 14.67164179 -1.5
19: Kansas City Intl 14.51405836 0.0
20: Albany Intl 14.39712919 -4.0
21: General Mitchell Intl 14.16722038 0.0
22: Piedmont Triad 14.11260054 -2.0
23: Washington Dulles Intl 13.86420212 -3.0
24: Cherry Capital Airport 12.96842105 -10.0
25: James M Cox Dayton Intl 12.68048606 -3.0
26: Louisville International Airport 12.66938406 -2.0
27: Chicago Midway Intl 12.36422360 -1.0
28: Sacramento Intl 12.10992908 4.0
29: Jacksonville Intl 11.84483416 -2.0
30: Nashville Intl 11.81245891 -2.0
31: Portland Intl Jetport 11.66040210 -4.0
32: Greater Rochester Intl 11.56064461 -5.0
33: Hartsfield Jackson Atlanta Intl 11.30011285 -1.0
34: Lambert St Louis Intl 11.07846451 -3.0
35: Norfolk Intl 10.94909344 -4.0
36: Baltimore Washington Intl 10.72673385 -5.0
37: Memphis Intl 10.64531435 -2.5
38: Port Columbus Intl 10.60132291 -3.0
39: Charleston Afb Intl 10.59296847 -4.0
40: Philadelphia Intl 10.12719014 -3.0
41: Raleigh Durham Intl 10.05238095 -3.0
42: Indianapolis Intl 9.94043412 -3.0
43: Charlottesville-Albemarle 9.50000000 -5.0
44: Cleveland Hopkins Intl 9.18161129 -5.0
45: Ronald Reagan Washington Natl 9.06695204 -2.0
46: Burlington Intl 8.95099602 -4.0
47: Buffalo Niagara Intl 8.94595186 -5.0
48: Syracuse Hancock Intl 8.90392501 -5.0
49: Denver Intl 8.60650021 -2.0
50: Palm Beach Intl 8.56297210 -3.0
51: BQN 8.24549550 -1.0
52: Bob Hope 8.17567568 -3.0
53: Fort Lauderdale Hollywood Intl 8.08212154 -3.0
54: Bangor Intl 8.02793296 -9.0
55: Asheville Regional Airport 8.00383142 -1.0
56: PSE 7.87150838 0.0
57: Pittsburgh Intl 7.68099053 -5.0
58: Gallatin Field 7.60000000 -2.0
59: NW Arkansas Regional 7.46572581 -2.0
60: Tampa Intl 7.40852503 -4.0
61: Charlotte Douglas Intl 7.36031885 -3.0
62: Minneapolis St Paul Intl 7.27016886 -5.0
63: William P Hobby 7.17618819 -4.0
64: Bradley Intl 7.04854369 -10.0
65: San Antonio Intl 6.94537178 -9.0
66: South Bend Rgnl 6.50000000 -3.5
67: Louis Armstrong New Orleans Intl 6.49017497 -6.0
68: Key West Intl 6.35294118 7.0
69: Eagle Co Rgnl 6.30434783 -4.0
70: Austin Bergstrom Intl 6.01990875 -5.0
71: Chicago Ohare Intl 5.87661475 -8.0
72: Orlando Intl 5.45464309 -5.0
73: Detroit Metro Wayne Co 5.42996346 -7.0
74: Portland Intl 5.14157973 -5.0
75: Nantucket Mem 4.85227273 -3.0
76: Wilmington Intl 4.63551402 -7.0
77: Myrtle Beach Intl 4.60344828 -13.0
78: Albuquerque International Sunport 4.38188976 -5.5
79: George Bush Intercontinental 4.24079040 -5.0
80: Norman Y Mineta San Jose Intl 3.44817073 -7.0
81: Southwest Florida Intl 3.23814963 -5.0
82: San Diego Intl 3.13916574 -5.0
83: Sarasota Bradenton Intl 3.08243131 -5.0
84: Metropolitan Oakland Intl 3.07766990 -9.0
85: General Edward Lawrence Logan Intl 2.91439222 -9.0
86: San Francisco Intl 2.67289152 -8.0
87: SJU 2.52052659 -6.0
88: Yampa Valley 2.14285714 2.0
89: Phoenix Sky Harbor Intl 2.09704733 -6.0
90: Montrose Regional Airport 1.78571429 -10.5
91: Los Angeles Intl 0.54711094 -7.0
92: Dallas Fort Worth Intl 0.32212685 -9.0
93: Miami Intl 0.29905978 -9.0
94: Mc Carran Intl 0.25772849 -8.0
95: Salt Lake City Intl 0.17625459 -8.0
96: Long Beach -0.06202723 -10.0
97: Martha\\\\'s Vineyard -0.28571429 -11.0
98: Seattle Tacoma Intl -1.09909910 -11.0
99: Honolulu Intl -1.36519258 -7.0
100: STT -3.83590734 -9.0
101: John Wayne Arpt Orange Co -7.86822660 -11.0
102: Palm Springs Intl -12.72222222 -13.5
name avg_delay med_delay
b.
merged <- merge(flights,
planes,
by = "tailnum",
all.x = TRUE)
avg_speed <- merged[, `:=`(flights_num = .N,
avg_mph = mean(distance/(air_time/60), na.rm = TRUE)),
by = model]
avg_speed[avg_speed[, .I[which.max(avg_mph)]],.(model, avg_mph, flights_num)] model avg_mph flights_num
<char> <num> <int>
1: 777-222 482.6254 4